source("Scripts/libraries.R")
source("Scripts/variables.R")
source("Scripts/functions.R")rm(list=ls())
source("Scripts/libraries.R")
source("Scripts/variables.R")
source("Scripts/functions.R")load(paste0(IO$sympto_data_02_processed,'cycledays.Rdata'), verbose = TRUE)## Loading objects:
## cycledays
agg = aggregate(cycle_nb ~ user_id, cycledays,lu)
j = which(agg$cycle_nb >= 50)
user_ids = agg$user_id[j]
rm(j, agg)for(u in user_ids[1:10]){
k = which(cycledays$user_id == u)
plot.tracking.history(d = cycledays[k,], show_goal = TRUE, relative_date = TRUE)
}rm(u, k)u = 1226
k = which(cycledays$user_id == u)
plot.tracking.history(d = cycledays[k,], show_goal = TRUE, relative_date = TRUE)cycledays_sympto_user_history = cycledays[k,]
save(cycledays_sympto_user_history,file = paste0(IO$restricted_figure_data,"history_sympto.Rdata"))
rm(u, k, cycledays_sympto_user_history, cycledays, user_ids)source("Scripts/_setup_Kindara.R")rm(list=ls())
source("Scripts/libraries.R")
source("Scripts/variables.R")
source("Scripts/functions.R")load(file = paste0(IO$kindara_data_accounts,'processed_cleaned_accounts_onefile.Rdata'), verbose = TRUE)## Loading objects:
## accounts
round(mean(accounts$age_registration, na.rm = TRUE), digits = 1)## [1] 29.2
round(sd(accounts$age_registration, na.rm = TRUE), digits = 1)## [1] 5.9
round(100 - (sum(is.na(accounts$age_registration))/nrow(accounts)*100))## [1] 13
users_demo_kindara = accounts[,c("age_registration","age_now")]
save(users_demo_kindara,file = paste0(IO$restricted_figure_data,"users_demo_kindara.Rdata"))
rm(accounts, users_demo_kindara)source("Scripts/_setup_Sympto.R")load(paste0(IO$sympto_data_02_processed,'users.Rdata'), verbose = TRUE)## Loading objects:
## users
users_demo = users[,c("age_registration","age_now","menarche_year","cm","kg","bmi")]
users_demo$cm_o = users_demo$cm
users_demo$cm[which(users_demo$cm < 100)] = 100 + users_demo$cm[which(users_demo$cm < 100)]
users_demo$bmi_o = users_demo$bmi
users_demo$bmi = users$kg / ((users_demo$cm/100)^2)
summary(users_demo)## age_registration age_now menarche_year cm
## Min. : 0.00 Min. : 0.00 Min. : 8.00 Min. :100.0
## 1st Qu.:26.00 1st Qu.:29.00 1st Qu.:12.00 1st Qu.:160.0
## Median :29.00 Median :33.00 Median :13.00 Median :165.0
## Mean :29.94 Mean :33.52 Mean :12.85 Mean :165.3
## 3rd Qu.:34.00 3rd Qu.:38.00 3rd Qu.:14.00 3rd Qu.:170.0
## Max. :74.00 Max. :77.00 Max. :18.00 Max. :229.0
## NA's :2570 NA's :2570 NA's :2819 NA's :2735
## kg bmi cm_o bmi_o
## Min. : 20.00 Min. : 7.703 Min. : 50.0 Min. : 7.703
## 1st Qu.: 54.00 1st Qu.: 19.818 1st Qu.:160.0 1st Qu.:19.818
## Median : 60.00 Median : 21.631 Median :165.0 Median :21.630
## Mean : 62.23 Mean : 22.788 Mean :161.3 Mean :22.840
## 3rd Qu.: 67.00 3rd Qu.: 24.342 3rd Qu.:170.0 3rd Qu.:24.314
## Max. :149.00 Max. :103.939 Max. :229.0 Max. :86.565
## NA's :2765 NA's :2786 NA's :2735 NA's :3214
round(apply(users_demo, 2, sd, na.rm = TRUE), digits = 2)## age_registration age_now menarche_year cm
## 6.36 6.69 1.61 6.73
## kg bmi cm_o bmi_o
## 13.14 4.76 21.25 5.02
100 - round(apply(users_demo, 2, function(x) sum(is.na(x)))/nrow(users)*100)## age_registration age_now menarche_year cm
## 81 81 79 80
## kg bmi cm_o bmi_o
## 80 80 80 76
users_demo_sympto = users_demo
save(users_demo_sympto,file = paste0(IO$restricted_figure_data,"users_demo_sympto.Rdata"))
rm(users, users_demo, users_demo_sympto)nothing to be done - everything is in the FAM_figures.Rmd
source("Scripts/_setup_Kindara.R")folder = paste0(IO$kindara_data_days,"03 selected/")
days.file.list = list.files(folder)
days.file.list = paste0(folder,days.file.list )
rm(folder)cycleday_from_end = -28:-1
breaks = seq(-0.6,2,by = 0.1)-0.05
mids = breaks[2:length(breaks)]-0.05
registerDoParallel(par$n_cores)
tic()
temp.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'temp_diff_from_p25',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 42.615 sec elapsed
colnames(temp.hist) = cycleday_from_end
n.tot = colSums(temp.hist)
temp.hist.norm = t(t(temp.hist)/ n.tot )
delta_BBT_distribution_kindara = data.frame(cycleday = cycleday_from_end,
d = t(temp.hist.norm))
colnames(delta_BBT_distribution_kindara) = c('cycleday',round(mids, digits = 2))
write.csv(delta_BBT_distribution_kindara,
file = paste0(IO$output_csv, 'delta_BBT_distribution_kindara.csv'),
row.names = FALSE)
save(delta_BBT_distribution_kindara, file = paste0(IO$output_Rdata, "delta_BBT_distribution_kindara.Rdata"))
n.cum = apply(temp.hist, 2, cumsum)
n.med = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.5, n.cum = n.cum, n.tot = n.tot)
med = mids[n.med]
n.10 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.1, n.cum = n.cum, n.tot = n.tot)
n.25 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.25, n.cum = n.cum, n.tot = n.tot)
n.75 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.75, n.cum = n.cum, n.tot = n.tot)
n.90 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.9, n.cum = n.cum, n.tot = n.tot)
t.10 = mids[n.10]
t.25 = mids[n.25]
t.75 = mids[n.75]
t.90 = mids[n.90]
delta_BBT_summary_kindara = data.frame(cycleday_from_end = cycleday_from_end,
p.10 = round(t.10, digits = 2),
p.25 = round(t.25, digits = 2),
median = round(med, digits = 2),
p.75 = round(t.75, digits = 2),
p.90 = round(t.90, digits = 2))
write.csv(delta_BBT_summary_kindara,
file = paste0(IO$output_csv, 'delta_BBT_summary_kindara.csv'),
row.names = FALSE)
save(delta_BBT_summary_kindara, file = paste0(IO$output_Rdata, "delta_BBT_summary_kindara.Rdata"))
rm(delta_BBT_summary_kindara, t.90, t.75, t.25, t.10, n.90, n.75, n.25,n.10, med, n.med,delta_BBT_distribution_kindara,temp.hist.norm, n.tot,mids, breaks, cycleday_from_end, n.cum, temp.hist)breaks = c(-0.25,dict$bleeding$index +0.25)
cycleday_from_end = -15:-1
cycleday = 1:15
tic()
bleeding.hist.end = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'bleeding',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 18.849 sec elapsed
tic()
bleeding.hist.start = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'bleeding',
breaks = breaks,
from_start = TRUE,
cycleday_v = cycleday)}
toc()## 17.946 sec elapsed
n.cycles = max(apply(bleeding.hist.start,2,sum))
bleeding.hist.start.norm = bleeding.hist.start / n.cycles
bleeding.hist.end.norm = bleeding.hist.end / n.cycles
bleeding_distribution_kindara = data.frame(
cycleday = c(cycleday_from_end, cycleday),
no_bleeding_reported = c(as.numeric(bleeding.hist.end.norm[1,]),as.numeric(bleeding.hist.start.norm[1,])),
spotting = c(as.numeric(bleeding.hist.end.norm[2,]),as.numeric(bleeding.hist.start.norm[2,])),
light_bleeding = c(as.numeric(bleeding.hist.end.norm[3,]),as.numeric(bleeding.hist.start.norm[3,])),
medium_bleeding = c(as.numeric(bleeding.hist.end.norm[4,]),as.numeric(bleeding.hist.start.norm[4,])),
heavy_bleeding = c(as.numeric(bleeding.hist.end.norm[5,]),as.numeric(bleeding.hist.start.norm[5,]))
)
write.csv(bleeding_distribution_kindara,
file = paste0(IO$output_csv, 'bleeding_distribution_kindara.csv'),
row.names = FALSE)
save(bleeding_distribution_kindara, file = paste0(IO$output_Rdata, "bleeding_distribution_kindara.Rdata"))
rm(bleeding.hist.start.norm, bleeding.hist.end.norm, bleeding.hist.start, bleeding.hist.end, cycleday_from_end, cycleday, bleeding_distribution_kindara)breaks = 0:13-0.5
mids = breaks[2:length(breaks)]-0.5
cycleday_from_end = -28:-1
tic()
mucus.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'mucus',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 19.622 sec elapsed
mucus.hist.norm = mucus.hist/n.cycles
cervical_mucus_distribution_kindara = data.frame(cycleday = cycleday_from_end,
m = t(mucus.hist.norm))
colnames(cervical_mucus_distribution_kindara) = c('cycleday',dict$mucus$names[2:14])
write.csv(cervical_mucus_distribution_kindara,
file = paste0(IO$output_csv, 'cervical_mucus_distribution_kindara.csv'),
row.names = FALSE)
save(cervical_mucus_distribution_kindara, file = paste0(IO$output_Rdata, "cervical_mucus_distribution_kindara.Rdata"))
rm(mucus.hist, mucus.hist.norm, breaks, mids, cycleday_from_end, cervical_mucus_distribution_kindara)breaks = 1:4-0.5
cycleday_from_end = -28:-1
tic()
cervix.openness.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'cervix_openness',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 18.239 sec elapsed
cervix.openness.hist.norm = cervix.openness.hist/n.cycles
tic()
cervix.height.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'cervix_height',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 16.812 sec elapsed
cervix.height.hist.norm = cervix.height.hist/n.cycles
tic()
cervix.firmness.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'cervix_firmness',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 19.475 sec elapsed
cervix.firmness.hist.norm = cervix.firmness.hist/n.cycles
cervix_distribution_kindara = data.frame(cycleday = cycleday_from_end,
o = t(cervix.openness.hist.norm),
f = t(cervix.firmness.hist.norm),
h = t(cervix.height.hist.norm)
)
colnames(cervix_distribution_kindara) = c('cycleday','o_closed','o_medium','o_open',
'f_firm','f_medium','f_soft',
'h_low','h_medium','h_high')
write.csv(cervix_distribution_kindara,
file = paste0(IO$output_csv, 'cervix_distribution_kindara.csv'),
row.names = FALSE)
save(cervix_distribution_kindara, file = paste0(IO$output_Rdata, "cervix_distribution_kindara.Rdata"))
rm(cervix_distribution_kindara, cervix.firmness.hist.norm, cervix.firmness.hist, cervix.height.hist.norm, cervix.height.hist, cervix.openness.hist.norm, cervix.openness.hist, breaks, cycleday_from_end )breaks = 1:5-0.5
cycleday_from_end = -28:-1
tic()
vaginal_sensation.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'vaginal_sensation',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 17.022 sec elapsed
vaginal_sensation.hist.norm = vaginal_sensation.hist/n.cycles
vaginal_sensation_distribution_kindara = data.frame(cycleday = cycleday_from_end,
s = t(vaginal_sensation.hist.norm))
colnames(vaginal_sensation_distribution_kindara) = c('cycleday',dict$feel$names[3:6])
write.csv(vaginal_sensation_distribution_kindara,
file = paste0(IO$output_csv, 'vaginal_sensation_distribution_kindara.csv'),
row.names = FALSE)
save(vaginal_sensation_distribution_kindara, file = paste0(IO$output_Rdata, "vaginal_sensation_distribution_kindara.Rdata"))
rm(vaginal_sensation_distribution_kindara, vaginal_sensation.hist, vaginal_sensation.hist.norm, breaks, cycleday_from_end)rm(days.file.list, n.cycles)source("Scripts/_setup_Sympto.R")load(paste0(IO$sympto_data_02_standard_cycles_with_HMM_res,"cycledays.Rdata"), verbose = TRUE)## Loading objects:
## cycledays
CC = cycledays
n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
cc = CC[i.end,]
uu = unique(cbind(cc$cycleday_from_end, cc$low_norm_temp), by = c('cycleday_from_end','low_norm_temp'))
uuu = aggregate(cc[,c('cycleday_from_end','low_norm_temp')], by = list(cc$cycleday_from_end,cc$low_norm_temp), length)
uuu = uuu[,1:3]
colnames(uuu) = c('cycleday_from_end','low_norm_temp','n')
uuu$density = uuu$n/max(uuu$n)
delta_BBT_distribution_sympto = uuu[order(uuu$cycleday_from_end,uuu$low_norm_temp),-3]
write.csv(delta_BBT_distribution_sympto,
file = paste0(IO$output_csv, 'delta_BBT_distribution_sympto.csv'),
row.names = FALSE)
save(delta_BBT_distribution_sympto, file = paste0(IO$output_Rdata,"delta_BBT_distribution_sympto.Rdata") )
quant = aggregate(low_norm_temp ~ cycleday_from_end, cc, quantile, seq(0,1,by=0.05))
delta_BBT_summary_sympto = data.frame(cycleday = quant$cycleday_from_end,
p.10 = round(quant$low_norm_temp[,3], digits = 3),
p.25 = round(quant$low_norm_temp[,6], digits = 3),
median = round(quant$low_norm_temp[,11], digits = 3),
p.75 = round(quant$low_norm_temp[,16], digits = 3),
p.90 = round(quant$low_norm_temp[,19], digits = 3)
)
write.csv(delta_BBT_summary_sympto,
file = paste0(IO$output_csv, 'delta_BBT_summary_sympto.csv'),
row.names = FALSE)
save(delta_BBT_summary_sympto, file = paste0(IO$output_Rdata,"delta_BBT_summary_sympto.Rdata") )
rm(delta_BBT_distribution_sympto, delta_BBT_summary_sympto, quant, uu, uuu, cc, i.end, i.start, n.days.in.cycle)obs = 'b'
n.days.in.cycle = 15
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
#bleeding from start
cc = CC[i.start,]
N = length(unique(cc$cycle_id))
breaks = seq(0.5, n.days.in.cycle+0.5, by = 1)
for(b in c(1,2,3)){
h = hist(cc$out_cycleday[cc$blood == b], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, blood = paste0(obs,'.',b),freq = h$counts/N )
if(b == 1){resB = res}else{resB = rbind(resB, res)}
}
resB$blood = factor(resB$blood)
resB = cast(resB, out_cycleday ~ blood )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resB) = c('cycleday','light_bleeding','medium_bleeding','high_bleeding')
resB.start = resB
#bleeding from end
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(b in c(1,2,3)){
h = hist(cc$cycleday_from_end[cc$blood == b], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, blood = paste0(obs,'.',b),freq = h$counts/N)
if(b == 1){resB = res}else{resB = rbind(resB, res)}
}
resB$blood = factor(resB$blood)
resB = cast(resB, out_cycleday ~ blood )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resB) = c('cycleday','light_bleeding','medium_bleeding','high_bleeding')
resB.end = resB
resB = rbind(resB.end, resB.start)
bleeding_distribution_sympto = resB
write.csv(bleeding_distribution_sympto,
file = paste0(IO$output_csv, 'bleeding_distribution_sympto.csv'),
row.names = FALSE)
save(bleeding_distribution_sympto, file = paste0(IO$output_Rdata, 'bleeding_distribution_sympto.Rdata'))
rm(res, resB, resB.end, resB.start, bleeding_distribution_sympto, N, b, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
obs = 'm'
#mucus from end
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(m in c(1,2,3,4)){
h = hist(cc$cycleday_from_end[cc$elixir == m], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, mucus = paste0(obs,'.',m),freq = h$counts/N )
if(m == 1){resM = res}else{resM = rbind(resM, res)}
}
resM$mucus = factor(resM$mucus )
resM = cast(resM, out_cycleday ~ mucus )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resM) = c('cycleday',dict$mucus$hmm.symbols[2:5])
cervical_mucus_distribution_sympto = resM
write.csv(cervical_mucus_distribution_sympto,
file = paste0(IO$output_csv, 'cervical_mucus_distribution_sympto.csv'),
row.names = FALSE)
save(cervical_mucus_distribution_sympto, file = paste0(IO$output_Rdata, 'cervical_mucus_distribution_sympto.Rdata'))
rm(res, resM, cervical_mucus_distribution_sympto, N, m, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)obs = 'c'
n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(c in c(11,12,13)){
h = hist(cc$cycleday_from_end[cc$feel == c], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, cervix = paste0(obs,'.',c),freq = h$counts/N )
if(c == 11){resC = res}else{resC = rbind(resC, res)}
}
resC$cervix = factor(resC$cervix )
resC = cast(resC, out_cycleday ~ cervix )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resC) = c('cycleday',dict$cervix$names[5:7])
cervix_distribution_sympto = resC
write.csv(cervix_distribution_sympto,
file = paste0(IO$output_csv, 'cervix_distribution_sympto.csv'),
row.names = FALSE)
save(cervix_distribution_sympto, file = paste0(IO$output_Rdata, 'cervix_distribution_sympto.Rdata'))
rm(res, resC, cervix_distribution_sympto, N, c, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)obs = "c"
n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(c in c(1,2,3)){
h = hist(cc$cycleday_from_end[cc$feel == c], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, cervix = paste0(obs,'.',c),freq = h$counts/N )
if(c == 1){resC = res}else{resC = rbind(resC, res)}
}
resC$cervix = factor(resC$cervix )
resC = cast(resC, out_cycleday ~ cervix )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resC) = c('cycleday',dict$cervix$names[2:4])
vaginal_sensation_distribution_sympto = resC
write.csv(vaginal_sensation_distribution_sympto,
file = paste0(IO$output_csv, 'vaginal_sensation_distribution_sympto.csv'),
row.names = FALSE)
save(vaginal_sensation_distribution_sympto, file = paste0(IO$output_Rdata, 'vaginal_sensation_distribution_sympto.Rdata'))
rm(res, resC, vaginal_sensation_distribution_sympto, N, c, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)source("Scripts/_setup_Kindara.R")cycle.file = paste0(IO$kindara_data_cycles,"06 CYCLES with reliable ovulation estimation.Rdata") file = paste0(IO$kindara_data_cycles,"05 standard with ovu est CYCLES.Rdata")
load(file, verbose = TRUE)
CYCLES_all = CYCLES
CYCLES = CYCLES[which(CYCLES$reliable.ovu.est),]
save(CYCLES, file = cycle.file)
rm(CYCLES_all)breaks = seq(0,63,by = 1)+0.5
mids = breaks[2:length(breaks)]-0.5
length.hist = hist.cycles.par(file = cycle.file,
attr = 'length',
breaks = breaks)## ../../FAM-Restricted-Access-Repo/Data/Kindara/Cycles/06 CYCLES with reliable ovulation estimation.Rdata
## 80663
## 80356
## 0.996
n.cycles = sum(length.hist)
length.hist.norm = length.hist/n.cycles
# ovulation time
ovu.hist = hist.cycles.par(file = cycle.file,
attr = 'ovu',
breaks = breaks)## ../../FAM-Restricted-Access-Repo/Data/Kindara/Cycles/06 CYCLES with reliable ovulation estimation.Rdata
## 80663
## 80545
## 0.999
ovu.hist.norm = ovu.hist/n.cycles
# luteal duration
lut.hist = hist.cycles.par(file = cycle.file,
attr = 'luteal.duration',
breaks = breaks)## ../../FAM-Restricted-Access-Repo/Data/Kindara/Cycles/06 CYCLES with reliable ovulation estimation.Rdata
## 80663
## 80640
## 1
lut.hist.norm = lut.hist/n.cycles
plot(mids, length.hist.norm, type = 'l', lwd = 3, col = viz_cols$dark.red, ylim = range(length.hist.norm,ovu.hist.norm,lut.hist.norm))
points(mids, ovu.hist.norm, type = 'l', lwd = 3, col = "black")
points(mids, lut.hist.norm, type = 'l', lwd = 3, col = viz_cols$dark.yellow)
abline(v = 28)####
cycle_phases_duration_distribution_kindara = data.frame(duration = mids,
cycle_length = length.hist.norm,
estimated_ovulation = ovu.hist.norm,
estimated_luteal_phase = lut.hist.norm)
write.csv(cycle_phases_duration_distribution_kindara,
file = paste0(IO$output_csv, 'cycle_phases_duration_distribution_kindara.csv'),
row.names = FALSE)
save(cycle_phases_duration_distribution_kindara, file = paste0(IO$output_Rdata, 'cycle_phases_duration_distribution_kindara.Rdata') )source("Scripts/_setup_Sympto.R")cycle.file = paste0(IO$sympto_data_03_reliable_ovulation_estimation,"cycles.Rdata")file = paste0(IO$sympto_data_02_standard_cycles_with_HMM_res,"cycles.Rdata")
load(file, verbose = TRUE)
cycles = cycles[which(cycles$reliable.ovu.est),]
save(cycles, file = cycle.file)load(cycle.file, verbose = TRUE)## Loading objects:
## cycles
at.y = seq(0,0.22,by = 0.02)
at.x = seq(0,65,by = 7)
x = cycles$cycle_length
h = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Cycle length',ylab = '% of cycles',
col = 'tomato2', border = 'tomato3', bars = FALSE)h.cycle_length = h
x = cycles$ovu
h = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Estimated ovulation day',ylab = '% of cycles',
col = 'gray30', border = 'gray25', bars = FALSE)h.ovu = h
x = cycles$luteal.duration
h = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Estimated luteal phase duration',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow, bars = FALSE)h.lut = h
breaks = seq(0,63,by = 1)+0.5
mids = breaks[2:length(breaks)]-0.5
cycle_phases_duration_distribution_sympto = data.frame(duration = mids)
m = match(cycle_phases_duration_distribution_sympto$duration, h.cycle_length$mids)
cycle_phases_duration_distribution_sympto$cycle_length = h.cycle_length$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.ovu$mids)
cycle_phases_duration_distribution_sympto$estimated_ovulation = h.ovu$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.lut$mids)
cycle_phases_duration_distribution_sympto$estimated_luteal_phase = h.lut$density[m]
# by goals
contra = which((cycles$reliable.ovu.est) & (cycles$goal_txt == 'Contraception'))
concep = which((cycles$reliable.ovu.est) & (cycles$goal_txt == 'Conception'))
x = cycles$cycle_length[contra]
h.cl.contra = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Cycle length',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow)x = cycles$cycle_length[concep]
h.cl.concep = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Cycle length',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow)x = cycles$ovu[contra]
h.ovu.contra = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Estimated ovulation day',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow)x = cycles$ovu[concep]
h.ovu.concep = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Estimated ovulation day',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow)x = cycles$luteal.duration[contra]
h.lut.contra = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Estimated luteal phase duration',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow)x = cycles$luteal.duration[concep]
h.lut.concep = plot_hist(x = x, bin.size = 1,
at.x = seq(0,65,by = 7), at.y = at.y,
xlab = 'Estimated luteal phase duration',ylab = '% of cycles',
col = viz_cols$yellow, border = viz_cols$dark.yellow)m = match(cycle_phases_duration_distribution_sympto$duration, h.cl.contra$mids)
cycle_phases_duration_distribution_sympto$cycle_length_avoid_preg = h.cl.contra$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.cl.concep$mids)
cycle_phases_duration_distribution_sympto$cycle_length_seek_preg = h.cl.concep$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.ovu.contra$mids)
cycle_phases_duration_distribution_sympto$estimated_ovulation_avoid_preg = h.ovu.contra$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.ovu.concep$mids)
cycle_phases_duration_distribution_sympto$estimated_ovulation_seek_preg = h.ovu.concep$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.lut.contra$mids)
cycle_phases_duration_distribution_sympto$estimated_luteal_phase_avoid_preg = h.lut.contra$density[m]
m = match(cycle_phases_duration_distribution_sympto$duration, h.lut.concep$mids)
cycle_phases_duration_distribution_sympto$estimated_luteal_phase_seek_preg = h.lut.concep$density[m]
test = cycle_phases_duration_distribution_sympto
test[is.na(test)] = 0
cycle_phases_duration_distribution_sympto = test
write.csv(cycle_phases_duration_distribution_sympto,
file = paste0(IO$output_csv, 'cycle_phases_duration_distribution_sympto.csv'),
row.names = FALSE)
save(cycle_phases_duration_distribution_sympto,file = paste0(IO$output_Rdata, 'cycle_phases_duration_distribution_sympto.Rdata') )source("Scripts/_setup_Kindara.R")cycle.file = paste0(IO$kindara_data_cycles,"05 standard with ovu est CYCLES.Rdata")
load(cycle.file, verbose = TRUE)## Loading objects:
## CYCLES
# DELTA T
########################
by = 0.1
mids = seq(0,2,by = by)
breaks = c(mids[1]-by/2, mids + by/2)
DT.hist = hist.cycles.par(file = cycle.file,
attr = 'D.T',
breaks = breaks,
filter = FALSE)## ../../FAM-Restricted-Access-Repo/Data/Kindara/Cycles/05 standard with ovu est CYCLES.Rdata
## 719182
## 523206
## 0.728
n.cycles = sum(DT.hist)
DT.hist.norm = DT.hist/n.cycles
temperature_shift_distribution_kindara = data.frame(temperature_shifts = mids,
fraction_of_cycles = DT.hist.norm)
write.csv(temperature_shift_distribution_kindara,
file = paste0(IO$output_csv, 'temperature_shift_distribution_kindara.csv'),
row.names = FALSE)
save(temperature_shift_distribution_kindara,
file = paste0(IO$output_Rdata, 'temperature_shift_distribution_kindara.Rdata'))
# SD on OVU ESTIMATION
########################
by = 0.1
mids = seq(0,10,by = by)
breaks = c(mids[1]-by/2, mids + by/2)
sd.hist = hist.cycles.par(file = cycle.file,
attr = 'ovu.sd',
breaks = breaks,
filter = FALSE)## ../../FAM-Restricted-Access-Repo/Data/Kindara/Cycles/05 standard with ovu est CYCLES.Rdata
## 719182
## 522148
## 0.726
n.cycles = sum(sd.hist)
sd.hist.norm = sd.hist/n.cycles
uncertainties_distribution_kindara = data.frame(uncertainties = mids,
fraction_of_cycles = sd.hist.norm)
write.csv(uncertainties_distribution_kindara,
file = paste0(IO$output_csv, 'uncertainties_distribution_kindara.csv'),
row.names = FALSE)
save(uncertainties_distribution_kindara,
file = paste0(IO$output_Rdata, 'uncertainties_distribution_kindara.Rdata'))
# CONFIDENCE
########################
by = 0.05
mids = seq(0,1,by = by)
breaks = c(mids[1]-by/2, mids + by/2)
confidence.hist = hist.cycles.par(file = cycle.file,
attr = 'confidence',
breaks = breaks,
filter = FALSE)## ../../FAM-Restricted-Access-Repo/Data/Kindara/Cycles/05 standard with ovu est CYCLES.Rdata
## 719182
## 523425
## 0.728
n.cycles = sum(sd.hist)
confidence.hist.norm = confidence.hist/n.cycles
confidence_scores_distribution_kindara = data.frame(confidence_score = mids,
fraction_of_cycles = confidence.hist.norm)
write.csv(confidence_scores_distribution_kindara,
file = paste0(IO$output_csv, 'confidence_scores_distribution_kindara.csv'),
row.names = FALSE)
save(confidence_scores_distribution_kindara,
file = paste0(IO$output_Rdata, 'confidence_scores_distribution_kindara.Rdata'))
rm(by, mids, breaks, n.cycles, confidence.hist, confidence.hist.norm, DT.hist, DT.hist.norm, uncertainties_distribution_kindara, confidence_scores_distribution_kindara, sd.hist, sd.hist.norm, CYCLES, cycle.file)source("Scripts/_setup_Sympto.R")file = paste0(IO$sympto_data_02_standard_cycles_with_HMM_res,"cycles.Rdata")
load(file, verbose = TRUE)## Loading objects:
## cycles
# DELTA T
########################
x = cycles$D.T[cycles$D.T >= 0]
h.DT = plot_hist(x = x, bin.size = 0.05,
at.x = seq(0,1,by = 0.1), at.y = seq(0,0.25,by = 0.05),
xlab = 'Temperature shifts',ylab = '% of cycles' )temperature_shift_distribution_sympto = data.frame(temperature_shifts = h.DT$mids,
fraction_of_cycles = h.DT$density)
write.csv(temperature_shift_distribution_sympto,
file = paste0(IO$output_csv, 'temperature_shift_distribution_sympto.csv'),
row.names = FALSE)
# SD on OVU ESTIMATION
########################
x = cycles$ovu.sd[cycles$ovu.sd >= 0]
h.sd = plot_hist(x = x, bin.size = 0.05,
at.x = seq(0,4,by = 0.5), at.y = seq(0,0.1,by = 0.02),
xlab = 'Uncertainty on ovu. estimates (in days)',ylab = '% of cycles' )uncertainties_distribution_sympto = data.frame(uncertainties = h.sd$mids,
fraction_of_cycles = h.sd$density)
write.csv(uncertainties_distribution_sympto,
file = paste0(IO$output_csv, 'uncertainties_distribution_sympto.csv'),
row.names = FALSE)
# CONFIDENCE
########################
x = cycles$confidence[cycles$confidence >= 0]
h.c = plot_hist(x = x, bin.size = 0.05,
at.x = seq(0,1,by = 0.2), at.y = seq(0,0.4,by = 0.05),
xlab = 'Confidence score on ovu. estimates',ylab = '% of cycles' )confidence_scores_distribution_sympto = data.frame(confidence_score = h.c$mids,
fraction_of_cycles = h.c$density)
write.csv(confidence_scores_distribution_sympto,
file = paste0(IO$output_csv, 'confidence_scores_distribution_sympto.csv'),
row.names = FALSE)source("Scripts/_setup_Kindara.R")days.folder.postHMM = paste0(IO$kindara_data_days,"05 post HMM/")
days.file.list = paste0(days.folder.postHMM,list.files(days.folder.postHMM))
breaks = c(0, seq(18,45,by = 3), Inf)
registerDoParallel(par$n_cores)
tic()
state_probs = foreach(day.file = days.file.list,.combine = combine.sum.state.probs) %dopar%
{compute.state.probs(file = day.file,
fun = 'sum',
breaks = breaks,
cycleday_lim = c(-40, 40))}
toc()## 77.547 sec elapsed
write.csv(state_probs,
file = paste0(IO$output_csv, 'states_probabilities_by_cycle_length_kindara.csv'),
row.names = FALSE)
save(state_probs,
file = paste0(IO$output_Rdata, 'states_probabilities_by_cycle_length_kindara.Rdata'))source("Scripts/_setup_Sympto.R")load(paste0(IO$sympto_data_02_standard_cycles_with_HMM_res,"cycledays.Rdata"), verbose = TRUE)
load(paste0(IO$sympto_data_03_reliable_ovulation_estimation,"cycles.Rdata"), verbose = TRUE)
m = match(cycledays$cycle_id, cycles$cycle_id)
cycledays = cycledays[!is.na(m),]
save(cycledays, file = paste0(IO$sympto_data_03_reliable_ovulation_estimation,"cycledays.Rdata") )load(paste0(IO$sympto_data_03_reliable_ovulation_estimation,"cycledays.Rdata"), verbose = TRUE)## Loading objects:
## cycledays
load(paste0(IO$sympto_data_03_reliable_ovulation_estimation,"cycles.Rdata"), verbose = TRUE)## Loading objects:
## cycles
breaks = c(11, seq(18,45,by = 3), 365)
cycle_length_bin = cut(cycledays$cycle_length, breaks = breaks)
cycledays$cycle_length_bin = cycle_length_bin
rm(cycle_length_bin)
j = (cycledays$cycleday_from_ovu >= -40) & (cycledays$cycleday_from_ovu <= 40)
agg = aggregate( x = cycledays[j,44:52],
by = list(cycle_length_bin = cycledays$cycle_length_bin[j],
cycleday_from_ovu = cycledays$cycleday_from_ovu[j]),FUN = sum)
agg_sum = aggregate(cycle_id ~ cycle_length_bin, cycledays[j,], function(x) length(unique(x)))
colnames(agg)[3:11] = paste0('state.',colnames(agg)[3:11])
agg$cycle_length_bin_num = as.numeric(agg$cycle_length_bin)
state_probs = reshape(agg, idvar = c("cycle_length_bin","cycle_length_bin_num","cycleday_from_ovu"),
times = c("hM" , "lM", "lE" , "hE", "O", "Rise" , "hP" , "Ep" , "lP"), timevar = "state",
varying = list(3:11), v.names = "prob", direction = "long")
state_probs$state = factor(state_probs$state, levels = c("hM" , "lM", "lE" , "hE", "O", "Rise" , "hP" , "Ep" , "lP"))
j = which(state_probs$cycle_length_bin_num %in% range(state_probs$cycle_length_bin_num))
state_probs = state_probs[-j,]
state_probs$prob_sum = state_probs$prob
m = match(state_probs$cycle_length_bin, agg_sum$cycle_length_bin)
state_probs$n_cycles = agg_sum$cycle_id[m]
state_probs$prob = state_probs$prob/state_probs$n_cycles
write.csv(state_probs,
file = paste0(IO$output_csv, 'states_probabilities_by_cycle_length_sympto.csv'),
row.names = FALSE)
save(state_probs,
file = paste0(IO$output_Rdata, 'states_probabilities_by_cycle_length_sympto.Rdata'))
g = ggplot(state_probs, aes(x = cycleday_from_ovu, y = prob, fill = state ))
g = g + geom_area(position = 'identity', alpha = 0.7) +
facet_grid(cycle_length_bin ~ ., scales = "free_y") +
scale_fill_manual(values = hmm_par$cycle.states.colors) +
xlab("Cycleday from estimated ovulation")+
ylab("Probabilities * Nb of cycles")
g + theme_hc() +
theme(strip.text.y = element_text(angle = 0),
strip.background = element_blank())